library(ggmap)
library(ggplot2)
library(ggmap)
library(maps)
library(mapdata)
library(openintro)
library(plotly)
library(billboarder)
library(vctrs)
library(dplyr)
states <- read.csv("~/Downloads/states.csv")
Breweries <- read.csv("~/Downloads/Breweries.csv")
Beers <- read.csv("~/Downloads/Beers.csv")
dat = Breweries
#obtain the count of the number of breweres by state arranged in decending order
state_count = dat %>% group_by(State) %>% count() %>% arrange(-n)
billboarder() %>%
bb_barchart(state_count) %>%
bb_labs(title = "Count of Breweries by State")
#Here we are creating heatmap to show quanties.
heat_map = ggplot(data = states) +
geom_polygon(aes(x = long, y = lat, fill = n, group = group), color = "white") +
coord_fixed(1.3) +
scale_fill_gradientn(colours = rev(heat.colors(10)),na.value = "grey90",
guide = guide_colourbar(barwidth = 5, barheight = 0.4,
#put legend title on top of legend
title.position = "top")) +
labs(title = "Count of Breweries by State (Continental US)")
ggplotly(heat_map)
#rename col to match Brewery Data set
colnames(Beers)[5]="Brew_ID"
#merge data by Brew_ID
final=merge(Beers,Breweries,by=c("Brew_ID"))
head(final,n=6)
Brew_ID Name.x Beer_ID ABV IBU Style
1 1 Get Together 2692 0.045 50 American IPA
2 1 Maggie's Leap 2691 0.049 26 Milk / Sweet Stout
3 1 Wall's End 2690 0.048 19 English Brown Ale
4 1 Pumpion 2689 0.060 38 Pumpkin Ale
5 1 Stronghold 2688 0.060 25 American Porter
6 1 Parapet ESB 2687 0.056 47 Extra Special / Strong Bitter (ESB)
Ounces Name.y City State
1 16 NorthGate Brewing Minneapolis MN
2 16 NorthGate Brewing Minneapolis MN
3 16 NorthGate Brewing Minneapolis MN
4 16 NorthGate Brewing Minneapolis MN
5 16 NorthGate Brewing Minneapolis MN
6 16 NorthGate Brewing Minneapolis MN
tail(final,n=6)
Brew_ID Name.x Beer_ID ABV IBU
2405 556 Pilsner Ukiah 98 0.055 NA
2406 557 Heinnieweisse Weissebier 52 0.049 NA
2407 557 Snapperhead IPA 51 0.068 NA
2408 557 Moo Thunder Stout 50 0.049 NA
2409 557 Porkslap Pale Ale 49 0.043 NA
2410 558 Urban Wilderness Pale Ale 30 0.049 NA
Style Ounces Name.y City
2405 German Pilsener 12 Ukiah Brewing Company Ukiah
2406 Hefeweizen 12 Butternuts Beer and Ale Garrattsville
2407 American IPA 12 Butternuts Beer and Ale Garrattsville
2408 Milk / Sweet Stout 12 Butternuts Beer and Ale Garrattsville
2409 American Pale Ale (APA) 12 Butternuts Beer and Ale Garrattsville
2410 English Pale Ale 12 Sleeping Lady Brewing Company Anchorage
State
2405 CA
2406 NY
2407 NY
2408 NY
2409 NY
2410 AK
#Of the missing values, 1005 are missing for "IBU", 5 missing values for "Style"", and 62 missing values for ABV, and 1,005 missing values for IBU.
#One method would be to omit the observations with missing data which isn't ideal since we will lose quite a few observations
omita=na.omit(final, cols = "ABV")
omiti=na.omit(final, cols = "IBU")
#The other method would be to impute the median values of the variable to the missing variables.
#Find Medians of Style, IBU, and ABV
#find median of ABV and IBU
omita%>%group_by(State)%>%summarise(median(ABV),median(IBU))
# A tibble: 50 x 3
State `median(ABV)` `median(IBU)`
<fct> <dbl> <dbl>
1 " AK" 0.057 46
2 " AL" 0.06 43
3 " AR" 0.04 39
4 " AZ" 0.0575 20.5
5 " CA" 0.058 42
6 " CO" 0.065 40
7 " CT" 0.061 29
8 " DC" 0.059 47.5
9 " DE" 0.055 52
10 " FL" 0.062 55
# … with 40 more rows
When we exclude all of the missing values, we find that Maine has both the highest median of ABV and IBU amongst all states. THe highest ABV is London Balling from KY. The most bitter beer (highest IBU) is “Bitter Bitch Imperial IPA” from OR.
medianalc=as.data.frame(omita%>%group_by(State)%>%summarise(median(ABV)))
medianibu=as.data.frame(omiti%>%group_by(State)%>%summarise(median(IBU)))
#label columns
colnames(medianalc)=c("State","ABV")
colnames(medianibu)=c("State","IBU")
#sort in descending order
medianalc = medianalc %>% arrange(-ABV)
medianibu = medianibu %>% arrange(-IBU)
#create charts
billboarder() %>%
bb_barchart(medianalc) %>%
bb_labs(title = "Median ABV of Breweries by State")
billboarder() %>%
bb_barchart(medianibu) %>%
bb_labs(title = "Median IBU of Breweries by State")
#Find beer with MAX ABV and MAX IBU
omita[omita$ABV==max(omita$ABV),]
Brew_ID Name.x Beer_ID ABV IBU Style Ounces
8 2 London Balling 2685 0.125 80 English Barleywine 16
Name.y City State
8 Against the Grain Brewery Louisville KY
omiti[omiti$IBU==max(omiti$IBU),]
Brew_ID Name.x Beer_ID ABV IBU
1857 375 Bitter Bitch Imperial IPA 980 0.082 138
Style Ounces Name.y City
1857 American Double / Imperial IPA 12 Astoria Brewing Company Astoria
State
1857 OR
We see that the distribution to be right skewed (the mean of 0.05977 is greater than the median of 0.05600) as seen in the histogram. We wee a range of 0.001 - 0.128 in the data for ABV. There are 62 missing values that we exclude from the analysis.
summary(omita$ABV)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.02700 0.05000 0.05700 0.05991 0.06800 0.12500
final %>% ggplot(aes(ABV*100)) + geom_histogram() + xlab("Alcohol by Volume") + ggtitle("Distribution of Beer ABV")
Looking at the plot, there does appear to be an association between IBU and ABV. As the ABV increases, we see IBU tends to increase as well. Theres seems to be a direct positive relationship between the two variables. There are some extreme outliers with extremely high ABV seen in the far right of the plot.
ggplot(data=omita) + geom_point(aes(x=ABV,y=IBU)) + ggtitle ("IBU vs ABV (Excluding all Missing Values)")